Replication of Cross-sex mQTL

library(dbplyr)
library(DBI)
ntests <- 126904049
con <- dbConnect(RSQLite::SQLite(), "terre_and_digpd_cis_all_impute_mQTL_9_methy_PC.db")
mQTL_results <- tbl(con, "terre")
ntests_digpd <- 100363748
mQTL_results_digpd <- tbl(con, "digpd")
mQTL_results %>%
  filter(FDR < 0.05) %>%
  summarize(total = n(), cpgs = n_distinct(gene))
mQTL_results %>%
  filter(`p-value` < (0.05 / ntests)) %>%
  summarize(total = n(), cpgs = n_distinct(gene))

mQTL_results_digpd %>%
  filter(FDR < 0.05) %>%
  summarize(total = n(), cpgs = n_distinct(gene))
mQTL_results_digpd %>%
  filter(`p-value` < (0.05 / ntests)) %>%
  summarize(total = n(), cpgs = n_distinct(gene))

library(dbplot)
mQTL_results %>% dbplot_histogram(`p-value`)
# qq(mQTL_results$`p-value`)

Checking replication

\(\pi_1\) statistics

to_check_replication <- mQTL_results_digpd %>%
  inner_join(mQTL_results %>% filter(FDR < 0.05), by = c("SNP", "gene"), copy = TRUE) %>%
  collect() %>% as.data.table()

pi0res <- pi0est(na.omit(as.numeric(to_check_replication$`p-value.x`)), lambda = seq(0, 0.245, 0.005), pi0.method = "bootstrap")
1 - pi0res$pi0
plot(pi0res$pi0.lambda, pi0res$lambda, xlab = bquote(pi[0]), ylab = bquote(lambda))
abline(v = pi0res$pi0)
qvalres <- qvalue_truncp(na.omit(as.numeric(to_check_replication$`p-value.x`)))
1 - qvalres$pi0
abline(v = qvalres$pi0)
pi0est(as.numeric(to_check_replication$`p-value.x`) / 0.25)

Colocalization with parkinson’s loci and overlap with ewas hits

load("../prs_analyses/prs_nalls_cross_w_sex_stratified.RData")
manifest <- IlluminaHumanMethylationEPICanno.ilm10b4.hg19::Other %>%
  as.data.frame() %>%
  rownames_to_column(var = "name")

positions <- IlluminaHumanMethylationEPICanno.ilm10b4.hg19::Locations %>%
  as.data.frame() %>%
  rownames_to_column(var = "name")

cross-sex GWAS

cross_coloc <- fread("cross_mqtl_cross_sumstats_pd_snp_colocalization_ph4.txt.gz")
male_coloc <- fread("male_mqtl_cross_sumstats_pd_snp_colocalization_ph4.txt.gz")
female_coloc <- fread("female_mqtl_cross_sumstats_pd_snp_colocalization_ph4.txt.gz")

Cross-sex mQTL

sig_bonf_cross <- top_prs_hits %>% filter(p.adjust(P.Value,method="BH") < 0.05)
cross_coloc %>%
  filter(PP.H4.abf > 0.9,probe %in% sig_bonf_cross$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

cross_coloc_probes <- cross_coloc %>%
  filter(PP.H4.abf > 0.9,probe %in% sig_bonf_cross$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name")) %>% select(probe)

cross_coloc %>%
  filter(PP.H4.abf < 0.9,probe %in% sig_bonf_cross$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

cross_coloc %>%
  filter(PP.H4.abf > 0.9,!probe %in% sig_bonf_cross$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))
(non_colocalized_cross <- sig_bonf_cross %>% left_join(manifest,by=c("ID" = "name"))  %>% left_join(positions,by=c("ID"="name")) %>% filter(!ID %in% cross_coloc_probes$probe) %>% arrange(chr,pos)  %>% select(ID,UCSC_RefGene_Name,chr,pos))

non_colocalized_cross %>% count(gsub(";.*","",UCSC_RefGene_Name))

Male mQTL

sig_bonf_male <- top_male_prs_hits %>% filter(P.Value < (0.05 / n()))
male_coloc %>%
  filter(PP.H4.abf > 0.9,probe %in% sig_bonf_male$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

male_coloc %>%
  filter(PP.H4.abf < 0.9,probe %in% sig_bonf_male$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

male_coloc %>%
  filter(PP.H4.abf > 0.9,!probe %in% sig_bonf_male$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

Female mQTL

sig_bonf_female <- top_female_prs_hits %>% filter(P.Value < (0.05 / n()))
female_coloc %>%
  filter(PP.H4.abf > 0.9,probe %in% sig_bonf_female$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

female_coloc %>%
  filter(PP.H4.abf < 0.9,probe %in% sig_bonf_female$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

female_coloc %>%
  filter(PP.H4.abf > 0.9,!probe %in% sig_bonf_female$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

Sex stratified GWAS colocalization

load("../prs_analyses/prs_blau_sex_stratified.RData")
male_stratified_coloc <- fread("male_mqtl_male_sumstats_pd_snp_colocalization_ph4.txt.gz")
female_stratified_coloc <- fread("female_mqtl_female_sumstats_pd_snp_colocalization_ph4.txt.gz")

See Script. ### Male results at CpG level

sig_FDR_male <- top_male_ewas %>% filter(adj.P.Val < 0.25)
male_stratified_coloc %>%
  filter(PP.H4.abf > 0.9,probe %in% sig_FDR_male$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name")) %>%
  select(contains("PP.H"),probe,locus,locus_snp,UCSC_RefGene_Name)

male_stratified_coloc %>%
  filter(PP.H4.abf < 0.9,probe %in% sig_FDR_male$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

male_stratified_coloc %>%
  filter(PP.H4.abf > 0.9,!probe %in% sig_FDR_male$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))
NA
NA
sig_FDR_male %>%
  group_by(ID) %>%
  left_join(manifest,by = c("ID"="name")) %>%
  left_join(positions,by= c("ID"="name")) %>%
  select(ID,chr,pos,UCSC_RefGene_Name) %>%
  arrange(chr,pos)
sig_FDR_female <- top_female_ewas %>% filter(adj.P.Val < 0.25)
female_stratified_coloc %>%
  filter(PP.H4.abf > 0.9,probe %in% sig_FDR_female$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

female_stratified_coloc %>%
  filter(PP.H4.abf < 0.9,probe %in% sig_FDR_female$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

female_stratified_coloc %>%
  filter(PP.H4.abf > 0.9,!probe %in% sig_FDR_female$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))
(sig_summary_FDR_female <- sig_FDR_female %>%
  group_by(ID) %>%
  left_join(manifest,by = c("ID"="name")) %>%
  left_join(positions,by= c("ID"="name")) %>%
  select(ID,chr,pos,UCSC_RefGene_Name)%>%
  arrange(as.numeric(gsub("chr","",chr)),pos) )
fdr_female_gostres<- gprofiler2::gost(gsub(";.*","",sig_summary_FDR_female$UCSC_RefGene_Name),organism = "hsapiens",custom_bg = gsub(";.*","",manifest$UCSC_RefGene_Name),correction_method = "fdr",user_threshold = 0.1)
Detected custom background input, domain scope is set to 'custom'
gprofiler2::gostplot(fdr_female_gostres)
`gather_()` was deprecated in tidyr 1.2.0.
Please use `gather()` instead.

male vs female overlap

res <- list("male"=unique(male_results[PP.H4.abf > 0.9]$probe),"female" = unique(female_results[PP.H4.abf > 0.9]$probe))
sex_shared <- Reduce(function(a,b)a[a %in% b],res)
male <- res$male[!res$male %in% res$female]
female <- res$female[!res$female %in% res$male]
plt <- VennDiagram::venn.diagram(res,filename=NULL,cex = 1,
  cat.cex = 1,
  lwd = 2)
grid.newpage()
grid.draw(plt)

Plot overlap

plot_probes_sex <- function(probe_set)
for(cur_probe in probe_set){
  to_plot_male <- male_results_snp[probe == cur_probe]  %>%
    mutate(POS = as.numeric(gsub(".*:","",snp))) %>%
    pivot_longer(c("pvalues.df1","pvalues.df2"),names_to="Var",values_to="Val") %>%
    mutate(Var=recode(Var,pvalues.df1="mQTL",pvalues.df2="GWAS"),Sex="Male")
  to_plot_female <- female_results_snp[probe == cur_probe]  %>%
    mutate(POS = as.numeric(gsub(".*:","",snp))) %>%
    pivot_longer(c("pvalues.df1","pvalues.df2"),names_to="Var",values_to="Val") %>%
    mutate(Var=recode(Var,pvalues.df1="mQTL",pvalues.df2="GWAS"),Sex="Female")
  p<- ggplot(rbind(to_plot_male,to_plot_female),aes(POS,-log10(Val),color=SNP.PP.H4)) +
    geom_point()+
    scale_color_fermenter(palette="Spectral") +
    facet_wrap(Var~Sex)+
    labs(y="-log10 P")+
    ggtitle(cur_probe)
  print(p)
}

Shared

plot_probes_sex(sex_shared)

Male

plot_probes_sex(male)

Female

plot_probes_sex(female)

Mendelian Randomization

library(MendelianRandomization)
library(LDlinkR)
library(glue)
run_mr_per_cpg <- function(mQTL,cur_gwas){
  require(MendelianRandomization)
  require(LDlinkR)
  require(glue)
  mQTL <- mQTL[grepl("rs",SNP)]
  cur_gwas <- cur_gwas[mQTL$SNP, on = "SNP", nomatch=0]
  cur_mQTL <- mQTL[cur_gwas$SNP, on = "SNP", nomatch=0]
  mqtl_rep <- !duplicated(signif(cur_mQTL$beta,1))
  cur_gwas <- cur_gwas[mqtl_rep]
  cur_mQTL <- cur_mQTL[mqtl_rep]
  if(nrow(cur_mQTL) < 1){
    return("No overlapping loci")
  }else if(nrow(cur_mQTL) == 1){
    mr_nocor_input <- mr_input(
      bx = cur_mQTL$beta,
      bxse = cur_mQTL$beta/cur_mQTL$`t-stat` ,
      by = cur_gwas$b,
      byse = cur_gwas$se
    )
    IVWObject <- try(
      mr_ivw(
        mr_nocor_input,
        correl = FALSE,
        distribution = "normal",
        alpha = 0.05
      ),
      silent =  FALSE
    )
  }else{
    attempt <- 1
    cor_table <- NULL
    print(nrow(cur_mQTL))
    while(is.null(cor_table) & attempt < 5){
      try(cor_table <-
        LDmatrix(
          snps=cur_mQTL$SNP,
          pop="EUR",
          genome_build = "grch37",
          token = gsub("\n","",readr::read_file("~/analysis_of_pd_dnam/ldlink_token.txt")),
        ),
        silent = FALSE
      )
      if(is.null(cor_table)){
        Sys.sleep(5)
        attempt <- attempt + 1
      }
    }
    cor_mat <- as.matrix(cor_table[,-c(1)])
    rownames(cor_mat) <- as.character(cor_table$RS_number)
    cur_mQTL <- cur_mQTL[rownames(cor_mat), on="SNP"]
    cur_gwas <- cur_gwas[rownames(cor_mat), on="SNP"]
    if(nrow(cur_mQTL) < 1){
      return("No overlapping loci after corr missing")
    }
    mr_cor_input <- mr_input(
          bx = cur_mQTL$beta,
          bxse = cur_mQTL$beta/cur_mQTL$`t-stat` ,
          by = cur_gwas$b,
          byse = cur_gwas$se,
          correlation = cor_mat
        )
    IVWObject <- try(
      mr_ivw(
        mr_cor_input,
        correl = TRUE,
        distribution = "normal",
        alpha = 0.05
      ),
      silent =  FALSE
    )
  }

  if(class(IVWObject) == 'try-error'){
    return(list(msg=glue("{nrow(cur_mQTL)} mQTL, MR not fit"),obj=mr_cor_input))
  }else{
    return(IVWObject)
  }
}

run MR per colocalized CpG site

cur_GWAS <- fread("~/nalls_PD.QC.gz",key="SNP")
|--------------------------------------------------|
|==================================================|
cross_mQTL <- fread("all_2022_prs_mQTL.txt", key="SNP")
|--------------------------------------------------|
|==================================================|
cur_GWAS <- cur_GWAS[p < 5e-8]
cross_mQTL <- cross_mQTL[`p-value` < 5e-8]
cross_mr_res <- list()
for(probe in unique(cross_coloc[PP.H4.abf > 0.9]$probe)){
  print(probe)
  cross_mr_res[[probe]] <- run_mr_per_cpg(cross_mQTL[gene == probe],cur_GWAS)
}
[1] "cg06632027"
[1] 3

LDlink server is working...
[1] "cg26578617"
[1] 4

LDlink server is working...
[1] "cg03706283"
[1] "cg18586891"
[1] "cg00916973"
[1] "cg01341218"
[1] 3

LDlink server is working...

Error in solve.default(omega) :
  le système est numériquement singulier : conditionnement de la réciproque = 2.63649e-17
[1] "cg03452365"
[1] "cg05159804"
[1] 3

LDlink server is working...
[1] "cg12609785"
[1] 2

LDlink server is working...
[1] "cg13438899"
[1] 3

LDlink server is working...
[1] "cg17117718"
[1] 2

LDlink server is working...
[1] "cg17911788"
[1] 3

LDlink server is working...
[1] "cg19943578"
[1] "cg23659289"
[1] "cg24477401"
[1] 2

LDlink server is working...
[1] "cg12407526"
[1] 3

LDlink server is working...
[1] "cg03867702"
[1] 2

LDlink server is working...
[1] "cg08487618"
[1] "cg10055458"
[1] 2

LDlink server is working...
[1] "cg22659356"
[1] 2

LDlink server is working...
[1] "cg15420606"
[1] "cg17144790"
[1] "cg18816397"
[1] 2

LDlink server is working...
[1] "cg02220965"
[1] "cg03540520"
[1] "cg12441436"
[1] "cg19290994"
[1] "cg27355006"
[1] "cg03392386"
[1] 2

LDlink server is working...
[1] "cg03642635"
[1] "cg12362517"
[1] 2

LDlink server is working...
[1] "cg19961173"
[1] 3

LDlink server is working...
[1] "cg15199181"
[1] "cg19026029"
[1] "cg23670519"
[1] "cg20311846"
[1] "cg24086068"
[1] "cg27417997"
[1] "cg02642017"
[1] "cg09236008"
[1] 4

LDlink server is working...
[1] "cg09755872"
[1] 4

LDlink server is working...
[1] "cg12872830"
[1] 2

LDlink server is working...
[1] "cg14366619"
[1] 3

LDlink server is working...
[1] "cg18081818"
[1] 3

LDlink server is working...
[1] "cg10428661"
[1] "cg19815271"
[1] "cg26099468"
[1] 2

LDlink server is working...

Error in solve.default(omega) :
  le système est numériquement singulier : conditionnement de la réciproque = 4.42528e-17
saveRDS(cross_mr_res,file="cross_mr_res.RData")

Summary Mendelian Randomization

cross_smr <- fread("~/all_2022_dnam_pd.smr")
cross_smr$set <- "Cross-sex GWAS, cross-sex mQTL"
male_nalls_smr <- fread("~/male_nalls_2022_dnam_pd.smr")
male_nalls_smr$set <- "Cross-sex GWAS, male mQTL"
female_nalls_smr <- fread("~/female_nalls_2022_dnam_pd.smr")
female_nalls_smr$set <- "Cross-sex GWAS, female mQTL"
male_blau_smr <- fread("~/male_blauwendraat_2022_dnam_pd.smr")
male_blau_smr$set <- "Male GWAS, male mQTL"
female_blau_smr <- fread("~/female_blauwendraat_2022_dnam_pd.smr")
female_blau_smr$set <- "Female GWAS, female mQTL"
(cross_smr_hits <- cross_smr[p_SMR < (0.05/ .N)])
(cross_smr_hits_heidi <- cross_smr[p_SMR < (0.05/ .N) & p_HEIDI > 0.01])

(male_nalls_smr_hits <- male_nalls_smr[p_SMR < (0.05/ .N)])
(male_nalls_smr_hits_heidi <- male_nalls_smr[p_SMR < (0.05/ .N) & p_HEIDI > 0.01])

(female_nalls_smr_hits <- female_nalls_smr[p_SMR < (0.05/ .N)])
(female_nalls_smr_hits_heidi <- female_nalls_smr[p_SMR < (0.05/ .N) & p_HEIDI > 0.01])

(male_blau_smr_hits <- male_blau_smr[p_SMR < (0.05/ .N)])
(male_blau_smr_hits_heidi <- male_blau_smr[p_SMR < (0.05/ .N) & p_HEIDI > 0.01])

(female_blau_smr_hits <- female_blau_smr[p_SMR < (0.05/ .N)])
(female_blau_smr_hits_heidi <- female_blau_smr[p_SMR < (0.05/ .N) & p_HEIDI > 0.01])
display_venn <- function(x, ...) {
  library(VennDiagram)
  grid.newpage()
  venn_object <- venn.diagram(x, filename = NULL, ...)
  grid.draw(venn_object)
}

display_venn(list(
  cross=cross_smr_hits$probeID,
  male=male_nalls_smr_hits$probeID,
  female=female_nalls_smr_hits$probeID
), fill = c("gray80", "lightblue", "lightpink"))


display_venn(list(
  cross=cross_smr_hits_heidi$probeID,
  male=male_nalls_smr_hits_heidi$probeID,
  female=female_nalls_smr_hits_heidi$probeID
), fill = c("gray80", "lightblue", "lightpink"))


display_venn(list(
  cross=cross_smr_hits$probeID,
  male=male_blau_smr_hits$probeID,
  female=female_blau_smr_hits$probeID
), fill = c("gray80", "lightblue", "lightpink"))


display_venn(list(
  cross=cross_smr_hits_heidi$probeID,
  male=male_blau_smr_hits_heidi$probeID,
  female=female_blau_smr_hits_heidi$probeID
), fill = c("gray80", "lightblue", "lightpink"))



display_venn(list(
  male_nalls=male_nalls_smr_hits_heidi$probeID,
  female_nalls=female_nalls_smr_hits_heidi$probeID,
  male_blau=male_blau_smr_hits$probeID,
  female_blau=female_blau_smr_hits$probeID
), fill = c("lightblue2", "lightpink2","lightblue4","lightpink4"))


display_venn(list(
  male_nalls=male_nalls_smr_hits_heidi$probeID,
  female_nalls=female_nalls_smr_hits_heidi$probeID,
  male_blau=male_blau_smr_hits_heidi$probeID,
  female_blau=female_blau_smr_hits_heidi$probeID
), fill = c("lightblue2", "lightpink2","lightblue4","lightpink4"))

to_plot <- rbind(cross_smr_hits,male_nalls_smr_hits,female_nalls_smr_hits, male_blau_smr_hits,female_blau_smr_hits)
ggplot(to_plot %>% mutate(`HEIDI Test`=ifelse(p_HEIDI > 0.01,"Passed","Failed")) %>%na.omit(),aes(set,fill=`HEIDI Test`))+
  geom_bar() +
  scale_fill_manual(values=c("Passed"="gray20","Failed"="gray70"))+
  geom_text(stat="count",position="stack",mapping=aes(label=..count..),vjust=-0.25)+
  coord_cartesian(ylim=c(0,75)) +
  labs(y="Number of DNAm-mediated\nAssociations",x="")+
  theme_minimal() +
  theme(axis.text.x=element_text(angle=60,hjust=1,vjust=1))

---
title: "Post mQTL analysis Terre"
output: html_notebook
---

```{r setup, include=FALSE}
library(coloc)
library(data.table)
library(parallel)
library(qqman)
library(qvalue)
library(readxl)
library(Gviz)
library(tidyverse)
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
knitr::opts_chunk$set(cache = TRUE)
```
## Replication of Cross-sex mQTL
```{r}
library(dbplyr)
library(DBI)
ntests <- 126904049
con <- dbConnect(RSQLite::SQLite(), "terre_and_digpd_cis_all_impute_mQTL_9_methy_PC.db")
mQTL_results <- tbl(con, "terre")
ntests_digpd <- 100363748
mQTL_results_digpd <- tbl(con, "digpd")
```
```{r}
mQTL_results %>%
  filter(FDR < 0.05) %>%
  summarize(total = n(), cpgs = n_distinct(gene))
mQTL_results %>%
  filter(`p-value` < (0.05 / ntests)) %>%
  summarize(total = n(), cpgs = n_distinct(gene))

mQTL_results_digpd %>%
  filter(FDR < 0.05) %>%
  summarize(total = n(), cpgs = n_distinct(gene))
mQTL_results_digpd %>%
  filter(`p-value` < (0.05 / ntests)) %>%
  summarize(total = n(), cpgs = n_distinct(gene))

library(dbplot)
mQTL_results %>% dbplot_histogram(`p-value`)
# qq(mQTL_results$`p-value`)
```

### Checking replication
$\pi_1$ statistics
```{r}
to_check_replication <- mQTL_results_digpd %>%
  inner_join(mQTL_results %>% filter(FDR < 0.05), by = c("SNP", "gene"), copy = TRUE) %>%
  collect() %>% as.data.table()

pi0res <- pi0est(na.omit(as.numeric(to_check_replication$`p-value.x`)), lambda = seq(0, 0.245, 0.005), pi0.method = "bootstrap")
1 - pi0res$pi0
plot(pi0res$pi0.lambda, pi0res$lambda, xlab = bquote(pi[0]), ylab = bquote(lambda))
abline(v = pi0res$pi0)
qvalres <- qvalue_truncp(na.omit(as.numeric(to_check_replication$`p-value.x`)))
1 - qvalres$pi0
abline(v = qvalres$pi0)
pi0est(as.numeric(to_check_replication$`p-value.x`) / 0.25)
```

# Colocalization with parkinson's loci and overlap with ewas hits
```{r}
load("../prs_analyses/prs_nalls_cross_w_sex_stratified.RData")
manifest <- IlluminaHumanMethylationEPICanno.ilm10b4.hg19::Other %>%
  as.data.frame() %>%
  rownames_to_column(var = "name")

positions <- IlluminaHumanMethylationEPICanno.ilm10b4.hg19::Locations %>%
  as.data.frame() %>%
  rownames_to_column(var = "name")
```

## cross-sex GWAS
```{r}
cross_coloc <- fread("cross_mqtl_cross_sumstats_pd_snp_colocalization_ph4.txt.gz")
male_coloc <- fread("male_mqtl_cross_sumstats_pd_snp_colocalization_ph4.txt.gz")
female_coloc <- fread("female_mqtl_cross_sumstats_pd_snp_colocalization_ph4.txt.gz")
```
### Cross-sex mQTL
```{r}
sig_bonf_cross <- top_prs_hits %>% filter(p.adjust(P.Value,method="BH") < 0.05)
cross_coloc %>%
  filter(PP.H4.abf > 0.9,probe %in% sig_bonf_cross$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

cross_coloc_probes <- cross_coloc %>%
  filter(PP.H4.abf > 0.9,probe %in% sig_bonf_cross$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name")) %>% select(probe)

cross_coloc %>%
  filter(PP.H4.abf < 0.9,probe %in% sig_bonf_cross$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

cross_coloc %>%
  filter(PP.H4.abf > 0.9,!probe %in% sig_bonf_cross$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))
```
```{r}
(non_colocalized_cross <- sig_bonf_cross %>% left_join(manifest,by=c("ID" = "name"))  %>% left_join(positions,by=c("ID"="name")) %>% filter(!ID %in% cross_coloc_probes$probe) %>% arrange(chr,pos)  %>% select(ID,UCSC_RefGene_Name,chr,pos))

non_colocalized_cross %>% count(gsub(";.*","",UCSC_RefGene_Name))
```


### Male mQTL
```{r}
sig_bonf_male <- top_male_prs_hits %>% filter(P.Value < (0.05 / n()))
male_coloc %>%
  filter(PP.H4.abf > 0.9,probe %in% sig_bonf_male$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

male_coloc %>%
  filter(PP.H4.abf < 0.9,probe %in% sig_bonf_male$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

male_coloc %>%
  filter(PP.H4.abf > 0.9,!probe %in% sig_bonf_male$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))
```

### Female mQTL
```{r}
sig_bonf_female <- top_female_prs_hits %>% filter(P.Value < (0.05 / n()))
female_coloc %>%
  filter(PP.H4.abf > 0.9,probe %in% sig_bonf_female$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

female_coloc %>%
  filter(PP.H4.abf < 0.9,probe %in% sig_bonf_female$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

female_coloc %>%
  filter(PP.H4.abf > 0.9,!probe %in% sig_bonf_female$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))
```

## Sex stratified GWAS colocalization
```{r}
load("../prs_analyses/prs_blau_sex_stratified.RData")
male_stratified_coloc <- fread("male_mqtl_male_sumstats_pd_snp_colocalization_ph4.txt.gz")
female_stratified_coloc <- fread("female_mqtl_female_sumstats_pd_snp_colocalization_ph4.txt.gz")
```

See [Script](run_colocalization.R).
### Male results at CpG level
```{r}
sig_FDR_male <- top_male_ewas %>% filter(adj.P.Val < 0.25)
male_stratified_coloc %>%
  filter(PP.H4.abf > 0.9,probe %in% sig_FDR_male$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name")) %>%
  select(contains("PP.H"),probe,locus,locus_snp,UCSC_RefGene_Name)

male_stratified_coloc %>%
  filter(PP.H4.abf < 0.9,probe %in% sig_FDR_male$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

male_stratified_coloc %>%
  filter(PP.H4.abf > 0.9,!probe %in% sig_FDR_male$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))


```
```{r}
sig_FDR_male %>%
  group_by(ID) %>%
  left_join(manifest,by = c("ID"="name")) %>% 
  left_join(positions,by= c("ID"="name")) %>% 
  select(ID,chr,pos,UCSC_RefGene_Name) %>% 
  arrange(chr,pos)
```

```{r}
sig_FDR_female <- top_female_ewas %>% filter(adj.P.Val < 0.25)
female_stratified_coloc %>%
  filter(PP.H4.abf > 0.9,probe %in% sig_FDR_female$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

female_stratified_coloc %>%
  filter(PP.H4.abf < 0.9,probe %in% sig_FDR_female$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))

female_stratified_coloc %>%
  filter(PP.H4.abf > 0.9,!probe %in% sig_FDR_female$ID) %>%
  group_by(probe) %>%
  slice(which.max(PP.H4.abf)) %>%
  left_join(manifest,by = c("probe"="name"))
```
```{r}
(sig_summary_FDR_female <- sig_FDR_female %>%
  group_by(ID) %>%
  left_join(manifest,by = c("ID"="name")) %>% 
  left_join(positions,by= c("ID"="name")) %>% 
  select(ID,chr,pos,UCSC_RefGene_Name)%>%
  arrange(as.numeric(gsub("chr","",chr)),pos) )
```
```{r}
fdr_female_gostres<- gprofiler2::gost(gsub(";.*","",sig_summary_FDR_female$UCSC_RefGene_Name),organism = "hsapiens",custom_bg = gsub(";.*","",manifest$UCSC_RefGene_Name),correction_method = "fdr",user_threshold = 0.1)
gprofiler2::gostplot(fdr_female_gostres)
```

### male vs female overlap

```{r}
res <- list("male"=unique(male_results[PP.H4.abf > 0.9]$probe),"female" = unique(female_results[PP.H4.abf > 0.9]$probe))
sex_shared <- Reduce(function(a,b)a[a %in% b],res)
male <- res$male[!res$male %in% res$female]
female <- res$female[!res$female %in% res$male]
plt <- VennDiagram::venn.diagram(res,filename=NULL,cex = 1,
  cat.cex = 1,
  lwd = 2)
grid.newpage()
grid.draw(plt)
```
## Plot overlap
```{r}
plot_probes_sex <- function(probe_set)
for(cur_probe in probe_set){
  to_plot_male <- male_results_snp[probe == cur_probe]  %>%
    mutate(POS = as.numeric(gsub(".*:","",snp))) %>%
    pivot_longer(c("pvalues.df1","pvalues.df2"),names_to="Var",values_to="Val") %>%
    mutate(Var=recode(Var,pvalues.df1="mQTL",pvalues.df2="GWAS"),Sex="Male")
  to_plot_female <- female_results_snp[probe == cur_probe]  %>%
    mutate(POS = as.numeric(gsub(".*:","",snp))) %>%
    pivot_longer(c("pvalues.df1","pvalues.df2"),names_to="Var",values_to="Val") %>%
    mutate(Var=recode(Var,pvalues.df1="mQTL",pvalues.df2="GWAS"),Sex="Female")
  p<- ggplot(rbind(to_plot_male,to_plot_female),aes(POS,-log10(Val),color=SNP.PP.H4)) +
    geom_point()+
    scale_color_fermenter(palette="Spectral") +
    facet_wrap(Var~Sex)+
    labs(y="-log10 P")+
    ggtitle(cur_probe)
  print(p)
}
```
### Shared
```{r}
plot_probes_sex(sex_shared)
```

### Male
```{r}
plot_probes_sex(male)
```
### Female
```{r}
plot_probes_sex(female)
```
## Mendelian Randomization
```{r}
library(MendelianRandomization)
library(LDlinkR)
library(glue)
run_mr_per_cpg <- function(mQTL,cur_gwas){
  require(MendelianRandomization)
  require(LDlinkR)
  require(glue)
  mQTL <- mQTL[grepl("rs",SNP)]
  cur_gwas <- cur_gwas[mQTL$SNP, on = "SNP", nomatch=0]
  cur_mQTL <- mQTL[cur_gwas$SNP, on = "SNP", nomatch=0]
  mqtl_rep <- !duplicated(signif(cur_mQTL$beta,1))
  cur_gwas <- cur_gwas[mqtl_rep]
  cur_mQTL <- cur_mQTL[mqtl_rep]
  if(nrow(cur_mQTL) < 1){
    return("No overlapping loci")
  }else if(nrow(cur_mQTL) == 1){
    mr_nocor_input <- mr_input(
      bx = cur_mQTL$beta,
      bxse = cur_mQTL$beta/cur_mQTL$`t-stat` ,
      by = cur_gwas$b,
      byse = cur_gwas$se
    )
    IVWObject <- try(
      mr_ivw(
        mr_nocor_input,
        correl = FALSE,
        distribution = "normal",
        alpha = 0.05
      ),
      silent =  FALSE
    )
  }else{
    attempt <- 1
    cor_table <- NULL
    print(nrow(cur_mQTL))
    while(is.null(cor_table) & attempt < 5){
      try(cor_table <-
        LDmatrix(
          snps=cur_mQTL$SNP,
          pop="EUR",
          genome_build = "grch37",
          token = gsub("\n","",readr::read_file("~/analysis_of_pd_dnam/ldlink_token.txt")),
        ),
        silent = FALSE
      )
      if(is.null(cor_table)){
        Sys.sleep(5)
        attempt <- attempt + 1
      }
    }
    cor_mat <- as.matrix(cor_table[,-c(1)])
    rownames(cor_mat) <- as.character(cor_table$RS_number)
    cur_mQTL <- cur_mQTL[rownames(cor_mat), on="SNP"]
    cur_gwas <- cur_gwas[rownames(cor_mat), on="SNP"]
    if(nrow(cur_mQTL) < 1){
      return("No overlapping loci after corr missing")
    }
    mr_cor_input <- mr_input(
          bx = cur_mQTL$beta,
          bxse = cur_mQTL$beta/cur_mQTL$`t-stat` ,
          by = cur_gwas$b,
          byse = cur_gwas$se,
          correlation = cor_mat
        )
    IVWObject <- try(
      mr_ivw(
        mr_cor_input,
        correl = TRUE,
        distribution = "normal",
        alpha = 0.05
      ),
      silent =  FALSE
    )
  }

  if(class(IVWObject) == 'try-error'){
    return(list(msg=glue("{nrow(cur_mQTL)} mQTL, MR not fit"),obj=mr_cor_input))
  }else{
    return(IVWObject)
  }
}
```

#### run MR per colocalized CpG site
```{r}
cur_GWAS <- fread("~/nalls_PD.QC.gz",key="SNP")
cross_mQTL <- fread("all_2022_prs_mQTL.txt", key="SNP")
```
```{r}
cur_GWAS <- cur_GWAS[p < 5e-8]
cross_mQTL <- cross_mQTL[`p-value` < 5e-8]
cross_mr_res <- list()
for(probe in unique(cross_coloc[PP.H4.abf > 0.9]$probe)){
  print(probe)
  cross_mr_res[[probe]] <- run_mr_per_cpg(cross_mQTL[gene == probe],cur_GWAS)
}
```
```{r}
saveRDS(cross_mr_res,file="cross_mr_res.RData")
```
## Summary Mendelian Randomization
```{r}
cross_smr <- fread("~/all_2022_dnam_pd.smr")
cross_smr$set <- "Cross-sex GWAS, cross-sex mQTL"
male_nalls_smr <- fread("~/male_nalls_2022_dnam_pd.smr")
male_nalls_smr$set <- "Cross-sex GWAS, male mQTL"
female_nalls_smr <- fread("~/female_nalls_2022_dnam_pd.smr")
female_nalls_smr$set <- "Cross-sex GWAS, female mQTL"
male_blau_smr <- fread("~/male_blauwendraat_2022_dnam_pd.smr")
male_blau_smr$set <- "Male GWAS, male mQTL"
female_blau_smr <- fread("~/female_blauwendraat_2022_dnam_pd.smr")
female_blau_smr$set <- "Female GWAS, female mQTL"
```

```{r}
(cross_smr_hits <- cross_smr[p_SMR < (0.05/ .N)])
(cross_smr_hits_heidi <- cross_smr[p_SMR < (0.05/ .N) & p_HEIDI > 0.01])

(male_nalls_smr_hits <- male_nalls_smr[p_SMR < (0.05/ .N)])
(male_nalls_smr_hits_heidi <- male_nalls_smr[p_SMR < (0.05/ .N) & p_HEIDI > 0.01])

(female_nalls_smr_hits <- female_nalls_smr[p_SMR < (0.05/ .N)])
(female_nalls_smr_hits_heidi <- female_nalls_smr[p_SMR < (0.05/ .N) & p_HEIDI > 0.01])

(male_blau_smr_hits <- male_blau_smr[p_SMR < (0.05/ .N)])
(male_blau_smr_hits_heidi <- male_blau_smr[p_SMR < (0.05/ .N) & p_HEIDI > 0.01])

(female_blau_smr_hits <- female_blau_smr[p_SMR < (0.05/ .N)])
(female_blau_smr_hits_heidi <- female_blau_smr[p_SMR < (0.05/ .N) & p_HEIDI > 0.01])
```

```{r}
display_venn <- function(x, ...) {
  library(VennDiagram)
  grid.newpage()
  venn_object <- venn.diagram(x, filename = NULL, ...)
  grid.draw(venn_object)
}

display_venn(list(
  cross=cross_smr_hits$probeID,
  male=male_nalls_smr_hits$probeID,
  female=female_nalls_smr_hits$probeID
), fill = c("gray80", "lightblue", "lightpink"))

display_venn(list(
  cross=cross_smr_hits_heidi$probeID,
  male=male_nalls_smr_hits_heidi$probeID,
  female=female_nalls_smr_hits_heidi$probeID
), fill = c("gray80", "lightblue", "lightpink"))

display_venn(list(
  cross=cross_smr_hits$probeID,
  male=male_blau_smr_hits$probeID,
  female=female_blau_smr_hits$probeID
), fill = c("gray80", "lightblue", "lightpink"))

display_venn(list(
  cross=cross_smr_hits_heidi$probeID,
  male=male_blau_smr_hits_heidi$probeID,
  female=female_blau_smr_hits_heidi$probeID
), fill = c("gray80", "lightblue", "lightpink"))


display_venn(list(
  male_nalls=male_nalls_smr_hits_heidi$probeID,
  female_nalls=female_nalls_smr_hits_heidi$probeID,
  male_blau=male_blau_smr_hits$probeID,
  female_blau=female_blau_smr_hits$probeID
), fill = c("lightblue2", "lightpink2","lightblue4","lightpink4"))

display_venn(list(
  male_nalls=male_nalls_smr_hits_heidi$probeID,
  female_nalls=female_nalls_smr_hits_heidi$probeID,
  male_blau=male_blau_smr_hits_heidi$probeID,
  female_blau=female_blau_smr_hits_heidi$probeID
), fill = c("lightblue2", "lightpink2","lightblue4","lightpink4"))
```
```{r}
to_plot <- rbind(cross_smr_hits,male_nalls_smr_hits,female_nalls_smr_hits, male_blau_smr_hits,female_blau_smr_hits)
ggplot(to_plot %>% mutate(`HEIDI Test`=ifelse(p_HEIDI > 0.01,"Passed","Failed")) %>%na.omit(),aes(set,fill=`HEIDI Test`))+
  geom_bar() + 
  scale_fill_manual(values=c("Passed"="gray20","Failed"="gray70"))+
  geom_text(stat="count",position="stack",mapping=aes(label=..count..),vjust=-0.25)+
  coord_cartesian(ylim=c(0,75)) +
  labs(y="Number of DNAm-mediated\nAssociations",x="")+
  theme_minimal() +
  theme(axis.text.x=element_text(angle=60,hjust=1,vjust=1))
```

```{r}
cross_smr_hits[is.na(p_HEIDI)]
```

